package cn.rslee.scala.spark.demos

import org.apache.spark.SparkConf
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext

/**
 * @author rslee
 * 基于Spark Streaming实现的实时数据统计demo,基于官方的例子做了一下优化
 */
object NetWorkCount {
  
  def main(args: Array[String]) {
    val conf = new SparkConf().setAppName("NetWordCount")
    
    val master = "local[2]"
    
    if(conf.getOption("master") == None){
      conf.setMaster(master)
    }
    val ssc = new StreamingContext(conf,Seconds(5))
    
    val lines = ssc.socketTextStream("localhost", 9999)
    
    val wordCount = lines.flatMap(line => line.split(" ")).map(w => (w,1)).reduceByKey(_+_)
    //打印结果
    wordCount.print()
    
    ssc.start()
    ssc.awaitTermination()
    
  }
  
}